Aminet 24

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 24 / Aminet 24 (1998)(GTI - Schatztruhe)[!][Apr 1998].iso / Aminet / comm / mail / Mutt089src.lha / Mutt-0.89i-AMIGA / src / rx / rxposix.c < prev next >

Wrap

C/C++ Source or Header | 1998-01-28 | 11KB | 485 lines

/* Copyright (C) 1995, 1996 Tom Lord * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this software; see the file COPYING. If not, write to * the Free Software Foundation, 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "rxall.h" #include "rxposix.h" #include "rxgnucomp.h" #include "rxbasic.h" #include "rxsimp.h" /* regcomp takes a regular expression as a string and compiles it. * * PATTERN is the address of the pattern string. * * CFLAGS is a series of bits which affect compilation. * * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we * use POSIX basic syntax. * * If REG_NEWLINE is set, then . and [^...] don't match newline. * Also, regexec will try a match beginning after every newline. * * If REG_ICASE is set, then we considers upper- and lowercase * versions of letters to be equivalent when matching. * * If REG_NOSUB is set, then when PREG is passed to regexec, that * routine will report only success or failure, and nothing about the * registers. * * It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for * the return codes and their meanings.) */ #ifdef __STDC__ int regncomp (regex_t * preg, const char * pattern, int len, int cflags) #else int regncomp (preg, pattern, len, cflags) regex_t * preg; const char * pattern; int len; int cflags; #endif { int ret; unsigned int syntax; rx_bzero ((char *)preg, sizeof (*preg)); syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC); if (!(cflags & REG_ICASE)) preg->translate = 0; else { unsigned i; preg->translate = (unsigned char *) malloc (256); if (!preg->translate) return (int) REG_ESPACE; /* Map uppercase characters to corresponding lowercase ones. */ for (i = 0; i < CHAR_SET_SIZE; i++) preg->translate[i] = isupper (i) ? tolower (i) : i; } /* If REG_NEWLINE is set, newlines are treated differently. */ if (!(cflags & REG_NEWLINE)) preg->newline_anchor = 0; else { /* REG_NEWLINE implies neither . nor [^...] match newline. */ syntax &= ~RE_DOT_NEWLINE; syntax |= RE_HAT_LISTS_NOT_NEWLINE; /* It also changes the matching behavior. */ preg->newline_anchor = 1; } preg->no_sub = !!(cflags & REG_NOSUB); ret = rx_parse (&preg->pattern, pattern, len, syntax, 256, preg->translate); /* POSIX doesn't distinguish between an unmatched open-group and an * unmatched close-group: both are REG_EPAREN. */ if (ret == REG_ERPAREN) ret = REG_EPAREN; if (!ret) { preg->re_nsub = 1; preg->subexps = 0; rx_posix_analyze_rexp (&preg->subexps, &preg->re_nsub, preg->pattern, 0); preg->is_nullable = rx_fill_in_fastmap (256, preg->fastmap, preg->pattern); preg->is_anchored = rx_is_anchored_p (preg->pattern); } return (int) ret; } #ifdef __STDC__ int regcomp (regex_t * preg, const char * pattern, int cflags) #else int regcomp (preg, pattern, cflags) regex_t * preg; const char * pattern; int cflags; #endif { /* POSIX says a null character in the pattern terminates it, so we * can use strlen here in compiling the pattern. */ return regncomp (preg, pattern, strlen (pattern), cflags); } /* Returns a message corresponding to an error code, ERRCODE, returned from either regcomp or regexec. */ #ifdef __STDC__ size_t regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) #else size_t regerror (errcode, preg, errbuf, errbuf_size) int errcode; const regex_t *preg; char *errbuf; size_t errbuf_size; #endif { const char *msg; size_t msg_size; msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode]; msg_size = strlen (msg) + 1; /* Includes the 0. */ if (errbuf_size != 0) { if (msg_size > errbuf_size) { strncpy (errbuf, msg, errbuf_size - 1); errbuf[errbuf_size - 1] = 0; } else strcpy (errbuf, msg); } return msg_size; } #ifdef __STDC__ int rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string) #else int rx_regmatch (pmatch, preg, rules, start, end, string) regmatch_t pmatch[]; const regex_t *preg; struct rx_context_rules * rules; int start; int end; const char *string; #endif { struct rx_solutions * solutions; enum rx_answers answer; struct rx_context_rules local_rules; int orig_end; int end_lower_bound; int end_upper_bound; local_rules = *rules; orig_end = end; if (!preg->pattern) { end_lower_bound = start; end_upper_bound = start; } else if (preg->pattern->len >= 0) { end_lower_bound = start + preg->pattern->len; end_upper_bound = start + preg->pattern->len; } else { end_lower_bound = start; end_upper_bound = end; } end = end_upper_bound; while (end >= end_lower_bound) { local_rules.not_eol = (rules->not_eol ? ( (end == orig_end) || !local_rules.newline_anchor || (string[end] != '\n')) : ( (end != orig_end) && (!local_rules.newline_anchor || (string[end] != '\n')))); solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps, start, end, &local_rules, string); if (!solutions) return REG_ESPACE; answer = rx_next_solution (solutions); if (answer == rx_yes) { if (pmatch) { pmatch[0].rm_so = start; pmatch[0].rm_eo = end; pmatch[0].final_tag = solutions->final_tag; } rx_basic_free_solutions (solutions); return 0; } else rx_basic_free_solutions (solutions); --end; } switch (answer) { default: case rx_bogus: return REG_ESPACE; case rx_no: return REG_NOMATCH; } } #ifdef __STDC__ int rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string) #else int rx_regexec (pmatch, preg, rules, start, end, string) regmatch_t pmatch[]; const regex_t *preg; struct rx_context_rules * rules; int start; int end; const char *string; #endif { int x; int stat; int anchored; struct rexp_node * simplified; struct rx_unfa * unfa; struct rx_classical_system machine; anchored = preg->is_anchored; unfa = 0; if ((end - start) > RX_MANY_CASES) { if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps)) return REG_ESPACE; unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256); if (!unfa) { rx_free_rexp (simplified); return REG_ESPACE; } rx_init_system (&machine, unfa->nfa); rx_free_rexp (simplified); } for (x = start; x <= end; ++x) { if (preg->is_nullable || ((x < end) && (preg->fastmap[((unsigned char *)string)[x]]))) { if ((end - start) > RX_MANY_CASES) { int amt; if (rx_start_superstate (&machine) != rx_yes) { rx_free_unfa (unfa); return REG_ESPACE; } amt = rx_advance_to_final (&machine, string + x, end - start - x); if (!machine.final_tag && (amt < (end - start - x))) goto nomatch; } stat = rx_regmatch (pmatch, preg, rules, x, end, string); if (!stat || (stat != REG_NOMATCH)) { rx_free_unfa (unfa); return stat; } } nomatch: if (anchored) if (